This R script generates timecourse plots of mean fixation proportion, and performs a statistical analysis to determine divergence points between fixations on different objects. It is adapted from the R script in the paper “Analysing data from the psycholinguistic visual‑world paradigm: Comparison of different analysis methods”, Ito & Knoeferle (2023).
The columns in the original eye-tracking data set are:
TOI: trial of interest
Participant: the code for a participant
Timeline: the version of the experiment
Bin: the number of the time bin for one participant and trial
Bin_duration: the duration of each time bin
Competitor: binomially coded fixation data - 1=fixation on competitor object in the current time bin; 0=fixation elsewhere
Distractor: binomially coded fixation data - 1=fixation on distractor object in the current time bin; 0=fixation elsewhere
Target: binomially coded fixation data - 1=fixation on target object in the current time bin; 0=fixation elsewhere
TimeMS: the time bin converted to time in milliseconds
Animacy_C: the animacy of the competitor noun
Animacy_D: the animacy of the distractor noun
Animacy_T: the animacy of the target noun
Determiner: the type of determiner in the trial (Definite or Possessive)
N_Gender_C: the lexical gender of the competitor noun (Masculine or Feminine)
N_Gender_D: the lexical gender of the distractor noun (Masculine or Feminine)
N_Gender_T: the lexical gender of the target noun (Masculine or Feminine)
Phonol_Overlap_C: whether the competitor noun has a minimal pair that only differs in the word-final vowel and lexical gender
Phonol_Overlap_D: whether the distractor noun has a minimal pair that only differs in the word-final vowel and lexical gender
Phonol_Overlap_T: whether the target noun has a minimal pair that only differs in the word-final vowel and lexical gender
ArticleOnset: the onset time (in milliseconds) of the determiner in the audio cue of the current trial
NounOnset: the onset time (in milliseconds) of the noun in the audio cue of the current trial
EndingOnset: the onset time (in milliseconds) of the noun’s final vowel in the audio cue of the current trial
EndingOffset: the offset time (in milliseconds) of the noun’s final vowel in the audio cue of the current trial
Remaining: the remaining time after the offset of the audio cue of the current trial
SpeakerType: the bilingual status of participant - 1 (Adult Spanish Immigrant) or 2 (Heritage Spanish Speaker)
Sex: the sex of the participant (Femenino or Masculino)
Age: the age of the participant
Position: the position of the object in fixation (top left, bottom centre, top right)
Trackloss: whether the fixation is in blank space or off screen, or there is a blink in the current time bin (TRUE or FALSE)
TimeMS_adjusted: TimeMS zeroed at ArticleOnset
ArticleOnset_adjusted: ArticleOnset zeroed at ArticleOnset (thus always 0)
NounOnset_adjusted: NounOnset zeroed at ArticleOnset
EndingOnset_adjusted: EndingOnset zeroed at ArticleOnset
EndingOffset_adjusted: EndingOffset zeroed at ArticleOnset
Remaining_adjusted: Remaining zeroed at ArticleOnset
rm(list = ls()) # Clear the workspace
options(scipen = 999) # Turn off scientific notation
require(Rmisc)
require(tidyverse)
require(ggplot2)
require(boot)
require(dplyr)
require(lme4)
## lme4 Matrix boot lubridate forcats stringr dplyr purrr
## "1.1-37" "1.7-0" "1.3-30" "1.9.4" "1.0.0" "1.5.1" "1.1.4" "1.0.4"
## readr tidyr tibble ggplot2 tidyverse Rmisc plyr lattice
## "2.1.5" "1.3.1" "3.2.1" "3.5.2" "2.0.0" "1.5.1" "1.8.9" "0.22-6"
fix.dat = read_delim("/Users/cindyzhang/Desktop/Spanish Gender Project/Spanish Gender Project/data-2025-03-12/df_cleaned.csv", delim=',')
# all the trackloss bins have been removed - in any given row, there is a fixation in one of the 3 AreaFixated
summary(fix.dat)
## ...1 TOI Participant Timeline
## Min. : 1 Length:732647 Length:732647 Length:732647
## 1st Qu.:183162 Class :character Class :character Class :character
## Median :366324 Mode :character Mode :character Mode :character
## Mean :366324
## 3rd Qu.:549486
## Max. :732647
##
## Bin Bin_duration Competitor Distractor
## Min. : 1.0 Min. : 1.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:181.0 1st Qu.:10.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :342.0 Median :10.000 Median :0.0000 Median :0.0000
## Mean :338.2 Mean : 9.991 Mean :0.2076 Mean :0.1891
## 3rd Qu.:499.0 3rd Qu.:10.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :729.0 Max. :10.000 Max. :1.0000 Max. :1.0000
##
## Target TimeMS Animacy_C Animacy_D
## Min. :0.0000 Min. : 10 Length:732647 Length:732647
## 1st Qu.:0.0000 1st Qu.:1810 Class :character Class :character
## Median :1.0000 Median :3420 Mode :character Mode :character
## Mean :0.6033 Mean :3381
## 3rd Qu.:1.0000 3rd Qu.:4990
## Max. :1.0000 Max. :7283
##
## Animacy_T Determiner N_ending_C N_ending_D
## Length:732647 Length:732647 Length:732647 Length:732647
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## N_ending_T N_Gender_C N_Gender_D N_Gender_T
## Length:732647 Length:732647 Length:732647 Length:732647
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Phonol_Overlap_C Phonol_Overlap_D Phonol_Overlap_T ArticleOnset
## Length:732647 Mode:logical Length:732647 Min. :2403
## Class :character NA's:732647 Class :character 1st Qu.:2448
## Mode :character Mode :character Median :2479
## Mean :2490
## 3rd Qu.:2524
## Max. :2679
##
## NounOnset EndingOnset EndingOffset Remaining SpeakerType
## Min. :2496 Min. :2591 Min. :2838 Min. :3005 Min. :1.00
## 1st Qu.:2576 1st Qu.:2762 1st Qu.:3020 1st Qu.:3144 1st Qu.:1.00
## Median :2613 Median :2859 Median :3079 Median :3230 Median :2.00
## Mean :2619 Mean :2877 Mean :3107 Mean :3239 Mean :1.54
## 3rd Qu.:2669 3rd Qu.:2972 3rd Qu.:3196 3rd Qu.:3315 3rd Qu.:2.00
## Max. :2807 Max. :3411 Max. :3546 Max. :3656 Max. :2.00
##
## Sex Age Position Trackloss
## Length:732647 Min. :19.00 Length:732647 Mode :logical
## Class :character 1st Qu.:20.00 Class :character FALSE:732647
## Mode :character Median :23.00 Mode :character
## Mean :23.75
## 3rd Qu.:25.00
## Max. :50.00
## NA's :142383
## TimeMS_adjusted ArticleOnset_adjusted NounOnset_adjusted
## Min. :-2670.0 Min. :0 Min. : 70.0
## 1st Qu.: -690.0 1st Qu.:0 1st Qu.:113.0
## Median : 930.0 Median :0 Median :128.0
## Mean : 886.2 Mean :0 Mean :128.3
## 3rd Qu.: 2490.0 3rd Qu.:0 3rd Qu.:144.0
## Max. : 4750.0 Max. :0 Max. :202.0
##
## EndingOnset_adjusted EndingOffset_adjusted Remaining_adjusted
## Min. :170.0 Min. :365.0 Min. : 540.0
## 1st Qu.:276.0 1st Qu.:544.0 1st Qu.: 668.0
## Median :388.0 Median :593.0 Median : 738.0
## Mean :386.3 Mean :616.7 Mean : 748.9
## 3rd Qu.:459.0 3rd Qu.:694.0 3rd Qu.: 818.0
## Max. :773.0 Max. :908.0 Max. :1018.0
##
## GenderMatchTarget_D
## Min. :0.0000
## 1st Qu.:0.0000
## Median :1.0000
## Mean :0.5251
## 3rd Qu.:1.0000
## Max. :1.0000
##
head(fix.dat)
## # A tibble: 6 × 40
## ...1 TOI Participant Timeline Bin Bin_duration Competitor Distractor
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 Trial01-d… TA105 VersionA 23 10 0 0
## 2 2 Trial01-d… TA105 VersionA 24 10 0 0
## 3 3 Trial01-d… TA105 VersionA 25 10 0 0
## 4 4 Trial01-d… TA105 VersionA 26 10 0 0
## 5 5 Trial01-d… TA105 VersionA 27 10 0 0
## 6 6 Trial01-d… TA105 VersionA 28 10 0 0
## # ℹ 32 more variables: Target <dbl>, TimeMS <dbl>, Animacy_C <chr>,
## # Animacy_D <chr>, Animacy_T <chr>, Determiner <chr>, N_ending_C <chr>,
## # N_ending_D <chr>, N_ending_T <chr>, N_Gender_C <chr>, N_Gender_D <chr>,
## # N_Gender_T <chr>, Phonol_Overlap_C <chr>, Phonol_Overlap_D <lgl>,
## # Phonol_Overlap_T <chr>, ArticleOnset <dbl>, NounOnset <dbl>,
## # EndingOnset <dbl>, EndingOffset <dbl>, Remaining <dbl>, SpeakerType <dbl>,
## # Sex <chr>, Age <dbl>, Position <chr>, Trackloss <lgl>, …
fix.dat <- fix.dat %>% filter(Bin_duration == 10)
fix.dat <- fix.dat %>%
mutate(AreaFixated = case_when(
Competitor == 1 ~ "Competitor",
Distractor == 1 ~ "Distractor",
Target == 1 ~ "Target"
))
fix.dat <- fix.dat %>%
mutate(GenderMatch = case_when(
N_Gender_D == N_Gender_T ~ "D=T",
N_Gender_D == N_Gender_C ~ "D=C"
))
In the original experiment, the eyetracker takes 25 samples in each 50ms bin, and records the proportion of samples that fall in each AOI. Our eyetracker takes 1 sample in each 10ms bin, and records which AOI the sample falls in (we already removed the trackloss bins, so the sample must be in one of the 3 AOIs, and binomially encoded as 0 or 1).
fix.long <- fix.dat %>%
pivot_longer(
cols = c(Target, Competitor, Distractor),
names_to = "AOI",
values_to = "FixP"
)
Participant, TOI,
AreaFixated, SpeakerType, AOI,
Phonol_Overlap_T and GenderMatch to
factorfix.long = fix.long %>% mutate_at(vars(Participant, TOI, AreaFixated, SpeakerType, AOI, Phonol_Overlap_T, GenderMatch), as.factor)
fix.long$Phonol_Overlap <- fix.long$Phonol_Overlap_T %>% fct_collapse(NonMinPair = c("First Syll-unstressed", "First Syll-stressed"))
head(fix.long)
## # A tibble: 6 × 42
## ...1 TOI Participant Timeline Bin Bin_duration TimeMS Animacy_C Animacy_D
## <dbl> <fct> <fct> <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 1 Tria… TA105 VersionA 23 10 230 Animate-… Animate-…
## 2 1 Tria… TA105 VersionA 23 10 230 Animate-… Animate-…
## 3 1 Tria… TA105 VersionA 23 10 230 Animate-… Animate-…
## 4 2 Tria… TA105 VersionA 24 10 240 Animate-… Animate-…
## 5 2 Tria… TA105 VersionA 24 10 240 Animate-… Animate-…
## 6 2 Tria… TA105 VersionA 24 10 240 Animate-… Animate-…
## # ℹ 33 more variables: Animacy_T <chr>, Determiner <chr>, N_ending_C <chr>,
## # N_ending_D <chr>, N_ending_T <chr>, N_Gender_C <chr>, N_Gender_D <chr>,
## # N_Gender_T <chr>, Phonol_Overlap_C <chr>, Phonol_Overlap_D <lgl>,
## # Phonol_Overlap_T <fct>, ArticleOnset <dbl>, NounOnset <dbl>,
## # EndingOnset <dbl>, EndingOffset <dbl>, Remaining <dbl>, SpeakerType <fct>,
## # Sex <chr>, Age <dbl>, Position <chr>, Trackloss <lgl>,
## # TimeMS_adjusted <dbl>, ArticleOnset_adjusted <dbl>, …
summary(fix.long)
## ...1 TOI Participant
## Min. : 1 Trial30-pos-sapo_0 : 28779 TA131 : 81555
## 1st Qu.:183183 Trial41-def-mujer_0 : 28770 TA124 : 80841
## Median :366326 Trial06-def-brujo_0 : 28491 TA128 : 79215
## Mean :366328 Trial08-def-puerta_0: 28377 TA142 : 76869
## 3rd Qu.:549482 Trial10-pos-oso_0 : 28374 TA130 : 76527
## Max. :732646 Trial29-pos-mosca_0 : 28338 TA137 : 76455
## (Other) :2022921 (Other):1722588
## Timeline Bin Bin_duration TimeMS
## Length:2194050 Min. : 1.0 Min. :10 Min. : 10
## Class :character 1st Qu.:180.0 1st Qu.:10 1st Qu.:1800
## Mode :character Median :342.0 Median :10 Median :3420
## Mean :337.6 Mean :10 Mean :3376
## 3rd Qu.:498.0 3rd Qu.:10 3rd Qu.:4980
## Max. :728.0 Max. :10 Max. :7280
##
## Animacy_C Animacy_D Animacy_T Determiner
## Length:2194050 Length:2194050 Length:2194050 Length:2194050
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## N_ending_C N_ending_D N_ending_T N_Gender_C
## Length:2194050 Length:2194050 Length:2194050 Length:2194050
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## N_Gender_D N_Gender_T Phonol_Overlap_C Phonol_Overlap_D
## Length:2194050 Length:2194050 Length:2194050 Mode:logical
## Class :character Class :character Class :character NA's:2194050
## Mode :character Mode :character Mode :character
##
##
##
##
## Phonol_Overlap_T ArticleOnset NounOnset EndingOnset
## First Syll-stressed :636219 Min. :2403 Min. :2496 Min. :2591
## First Syll-unstressed:827898 1st Qu.:2448 1st Qu.:2576 1st Qu.:2762
## Minimal Pair :729933 Median :2479 Median :2613 Median :2859
## Mean :2490 Mean :2619 Mean :2877
## 3rd Qu.:2524 3rd Qu.:2669 3rd Qu.:2972
## Max. :2679 Max. :2807 Max. :3411
##
## EndingOffset Remaining SpeakerType Sex Age
## Min. :2838 Min. :3005 1:1008648 Length:2194050 Min. :19.0
## 1st Qu.:3020 1st Qu.:3144 2:1185402 Class :character 1st Qu.:20.0
## Median :3079 Median :3230 Mode :character Median :23.0
## Mean :3107 Mean :3239 Mean :23.7
## 3rd Qu.:3196 3rd Qu.:3315 3rd Qu.:25.0
## Max. :3546 Max. :3656 Max. :50.0
## NA's :426369
## Position Trackloss TimeMS_adjusted ArticleOnset_adjusted
## Length:2194050 Mode :logical Min. :-2670.0 Min. :0
## Class :character FALSE:2194050 1st Qu.: -690.0 1st Qu.:0
## Mode :character Median : 920.0 Median :0
## Mean : 880.9 Mean :0
## 3rd Qu.: 2480.0 3rd Qu.:0
## Max. : 4750.0 Max. :0
##
## NounOnset_adjusted EndingOnset_adjusted EndingOffset_adjusted
## Min. : 70.0 Min. :170.0 Min. :365.0
## 1st Qu.:113.0 1st Qu.:276.0 1st Qu.:544.0
## Median :128.0 Median :388.0 Median :593.0
## Mean :128.3 Mean :386.3 Mean :616.7
## 3rd Qu.:144.0 3rd Qu.:459.0 3rd Qu.:694.0
## Max. :202.0 Max. :773.0 Max. :908.0
##
## Remaining_adjusted GenderMatchTarget_D AreaFixated GenderMatch
## Min. : 540.0 Min. :0.0000 Competitor: 456042 D=C:1041837
## 1st Qu.: 668.0 1st Qu.:0.0000 Distractor: 415515 D=T:1152213
## Median : 738.0 Median :1.0000 Target :1322493
## Mean : 748.9 Mean :0.5252
## 3rd Qu.: 818.0 3rd Qu.:1.0000
## Max. :1018.0 Max. :1.0000
##
## AOI FixP Phonol_Overlap
## Competitor:731350 Min. :0.0000 NonMinPair :1464117
## Distractor:731350 1st Qu.:0.0000 Minimal Pair: 729933
## Target :731350 Median :0.0000
## Mean :0.3333
## 3rd Qu.:1.0000
## Max. :1.0000
##
FixP for each time
bin, AOI, SpeakerType, Determiner and TOIfix.means = Rmisc::summarySE(fix.long, measurevar='FixP', groupvars=c('TimeMS_adjusted','SpeakerType', 'AOI', 'Determiner'), na.rm=T)
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
head(fix.means)
## TimeMS_adjusted SpeakerType AOI Determiner N FixP sd se ci
## 1 -2670 1 Competitor Definite 5 0 0 0 0
## 2 -2670 1 Distractor Definite 5 0 0 0 0
## 3 -2670 1 Target Definite 5 1 0 0 0
## 4 -2670 2 Competitor Definite 4 0 0 0 0
## 5 -2670 2 Distractor Definite 4 0 0 0 0
## 6 -2670 2 Target Definite 4 1 0 0 0
summary(fix.means)
## TimeMS_adjusted SpeakerType AOI Determiner
## Min. :-2670.0 1:4011 Competitor:2793 Length:8379
## 1st Qu.: -900.0 2:4368 Distractor:2793 Class :character
## Median : 850.0 Target :2793 Mode :character
## Mean : 853.9
## 3rd Qu.: 2590.0
## Max. : 4750.0
##
## N FixP sd se
## Min. : 1.0 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:256.0 1st Qu.:0.1065 1st Qu.:0.2959 1st Qu.:0.01722
## Median :285.0 Median :0.3152 Median :0.4342 Median :0.02610
## Mean :261.9 Mean :0.3333 Mean :0.3775 Mean :0.02597
## 3rd Qu.:303.0 3rd Qu.:0.3711 3rd Qu.:0.4724 3rd Qu.:0.02857
## Max. :364.0 Max. :1.0000 Max. :0.5774 Max. :0.33333
## NA's :132 NA's :132
## ci
## Min. :0.00000
## 1st Qu.:0.03390
## Median :0.05136
## Mean :0.05370
## 3rd Qu.:0.05625
## Max. :1.43422
## NA's :132
fix.means.phonol = Rmisc::summarySE(fix.long, measurevar='FixP', groupvars=c('TimeMS_adjusted','SpeakerType', 'AOI', 'Determiner', 'Phonol_Overlap'), na.rm=T)
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
head(fix.means.phonol)
## TimeMS_adjusted SpeakerType AOI Determiner Phonol_Overlap N FixP sd se
## 1 -2670 1 Competitor Definite Minimal Pair 5 0 0 0
## 2 -2670 1 Distractor Definite Minimal Pair 5 0 0 0
## 3 -2670 1 Target Definite Minimal Pair 5 1 0 0
## 4 -2670 2 Competitor Definite Minimal Pair 4 0 0 0
## 5 -2670 2 Distractor Definite Minimal Pair 4 0 0 0
## 6 -2670 2 Target Definite Minimal Pair 4 1 0 0
## ci
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
summary(fix.means.phonol)
## TimeMS_adjusted SpeakerType AOI Determiner
## Min. :-2670.0 1:7941 Competitor:5488 Length:16464
## 1st Qu.: -890.0 2:8523 Distractor:5488 Class :character
## Median : 830.0 Target :5488 Mode :character
## Mean : 832.7
## 3rd Qu.: 2540.0
## Max. : 4750.0
##
## Phonol_Overlap N FixP sd
## NonMinPair :8229 Min. : 1.0 Min. :0.0000 Min. :0.0000
## Minimal Pair:8235 1st Qu.: 92.0 1st Qu.:0.1121 1st Qu.:0.3040
## Median :112.0 Median :0.3084 Median :0.4312
## Mean :133.3 Mean :0.3333 Mean :0.3841
## 3rd Qu.:190.0 3rd Qu.:0.3887 3rd Qu.:0.4739
## Max. :246.0 Max. :1.0000 Max. :0.7071
## NA's :384
## se ci
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0254 1st Qu.:0.0502
## Median :0.0348 Median :0.0686
## Mean :0.0374 Mean :0.0783
## 3rd Qu.:0.0461 3rd Qu.:0.0915
## Max. :0.5000 Max. :6.3531
## NA's :384 NA's :384
fix.means.fourgroups = Rmisc::summarySE(fix.long, measurevar='FixP', groupvars=c('TimeMS_adjusted','SpeakerType', 'AOI', 'Determiner', 'Phonol_Overlap', 'GenderMatch'), na.rm=T)
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
head(fix.means.fourgroups)
## TimeMS_adjusted SpeakerType AOI Determiner Phonol_Overlap GenderMatch
## 1 -2670 1 Competitor Definite Minimal Pair D=C
## 2 -2670 1 Distractor Definite Minimal Pair D=C
## 3 -2670 1 Target Definite Minimal Pair D=C
## 4 -2670 2 Competitor Definite Minimal Pair D=C
## 5 -2670 2 Distractor Definite Minimal Pair D=C
## 6 -2670 2 Target Definite Minimal Pair D=C
## N FixP sd se ci
## 1 5 0 0 0 0
## 2 5 0 0 0 0
## 3 5 1 0 0 0
## 4 4 0 0 0 0
## 5 4 0 0 0 0
## 6 4 1 0 0 0
summary(fix.means.fourgroups)
## TimeMS_adjusted SpeakerType AOI Determiner
## Min. :-2670 1:15756 Competitor:10768 Length:32304
## 1st Qu.: -900 2:16548 Distractor:10768 Class :character
## Median : 790 Target :10768 Mode :character
## Mean : 790
## 3rd Qu.: 2470
## Max. : 4750
##
## Phonol_Overlap GenderMatch N FixP
## NonMinPair :16155 D=C:16068 Min. : 1.00 Min. :0.0000
## Minimal Pair:16149 D=T:16236 1st Qu.: 44.00 1st Qu.:0.1143
## Median : 62.00 Median :0.3000
## Mean : 67.92 Mean :0.3333
## 3rd Qu.: 95.00 3rd Qu.:0.4054
## Max. :142.00 Max. :1.0000
##
## sd se ci
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.3043 1st Qu.:0.0354 1st Qu.:0.0708
## Median :0.4280 Median :0.0489 Median :0.0975
## Mean :0.3828 Mean :0.0516 Mean :0.1126
## 3rd Qu.:0.4764 3rd Qu.:0.0635 3rd Qu.:0.1276
## Max. :0.7071 Max. :0.5000 Max. :6.3531
## NA's :525 NA's :525 NA's :525
fix.means.gendermatch = Rmisc::summarySE(fix.long, measurevar='FixP', groupvars=c('TimeMS_adjusted','SpeakerType', 'AOI', 'Determiner', 'GenderMatch'), na.rm=T)
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
head(fix.means.gendermatch)
## TimeMS_adjusted SpeakerType AOI Determiner GenderMatch N FixP sd se ci
## 1 -2670 1 Competitor Definite D=C 5 0 0 0 0
## 2 -2670 1 Distractor Definite D=C 5 0 0 0 0
## 3 -2670 1 Target Definite D=C 5 1 0 0 0
## 4 -2670 2 Competitor Definite D=C 4 0 0 0 0
## 5 -2670 2 Distractor Definite D=C 4 0 0 0 0
## 6 -2670 2 Target Definite D=C 4 1 0 0 0
summary(fix.means.gendermatch)
## TimeMS_adjusted SpeakerType AOI Determiner GenderMatch
## Min. :-2670.0 1:7953 Competitor:5501 Length:16503 D=C:8202
## 1st Qu.: -890.0 2:8550 Distractor:5501 Class :character D=T:8301
## Median : 830.0 Target :5501 Mode :character
## Mean : 833.2
## 3rd Qu.: 2550.0
## Max. : 4750.0
##
## N FixP sd se
## Min. : 1.0 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:127.0 1st Qu.:0.1078 1st Qu.:0.2992 1st Qu.:0.0248
## Median :141.0 Median :0.3101 Median :0.4337 Median :0.0366
## Mean :132.9 Mean :0.3333 Mean :0.3830 Mean :0.0359
## 3rd Qu.:156.0 3rd Qu.:0.3816 3rd Qu.:0.4738 3rd Qu.:0.0406
## Max. :196.0 Max. :1.0000 Max. :0.7071 Max. :0.5000
## NA's :405 NA's :405
## ci
## Min. :0.0000
## 1st Qu.:0.0490
## Median :0.0723
## Mean :0.0754
## 3rd Qu.:0.0803
## Max. :6.3531
## NA's :405
fix.plot = ggplot() +
facet_wrap(~Determiner + SpeakerType, ncol=2,
labeller = labeller(
Determiner = c(
"Definite" = "Definite determiner",
"Possessive" = "Possessive determiner"
),
SpeakerType = c(
"1" = "ASI",
"2" = "HSS"
)
)
) + theme_bw() +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
geom_line(data=fix.means,aes(x=TimeMS_adjusted, y=FixP, group=AOI, colour=AOI, lty=AOI), lwd=1.5) + # TODO use SE or CI here?
geom_ribbon(data=fix.means,aes(x=TimeMS_adjusted,ymin=FixP-se,ymax=FixP+se,color=AOI,fill=AOI), linewidth=.2, alpha=.3, lty="dashed", show.legend=F) +
labs(y="Fixation Proportion", x="Time relative to the target word onset (ms)") +
scale_x_continuous(limits=c(-300, 1400),expand=c(0,0),breaks=seq(-300, 1400, 200)) +
scale_color_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
scale_fill_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
scale_linetype_manual('AOI', values=c("solid","dotted", "dotdash")) +
theme(
text=element_text(size=20),
legend.key.height=unit(.3,"in"),
legend.key.width=unit(.6,"in"),
panel.spacing=unit(1, "in"))
fix.plot
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggsave("plots/fixation_plot.png", width = 20, height = 15, dpi = 300)
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).
fix.plot.phonol = ggplot() +
facet_wrap(~Determiner + SpeakerType + Phonol_Overlap, ncol=4,
labeller = labeller(
Determiner = c(
"Definite" = "Definite determiner",
"Possessive" = "Possessive determiner"
),
SpeakerType = c(
"1" = "ASI",
"2" = "HSS"
),
Phonol_Overlap = c(
"Minimal Pair" = "Minimal Pair",
"NonMinPair" = "Non Minimal Pair"
)
)
) + theme_bw() +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
geom_line(data=fix.means.phonol,aes(x=TimeMS_adjusted, y=FixP, group=AOI, colour=AOI, lty=AOI), lwd=1.5) + # TODO use SE or CI here?
geom_ribbon(data=fix.means.phonol,aes(x=TimeMS_adjusted,ymin=FixP-se,ymax=FixP+se,color=AOI,fill=AOI), linewidth=.2, alpha=.3, lty="dashed", show.legend=F) +
labs(y="Fixation Proportion", x="Time relative to the target word onset (ms)") +
scale_x_continuous(limits=c(-300, 1400),expand=c(0,0),breaks=seq(-300, 1400, 200)) +
scale_color_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
scale_fill_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
scale_linetype_manual('AOI', values=c("solid","dotted", "dotdash")) +
theme(
text=element_text(size=20),
legend.key.height=unit(.3,"in"),
legend.key.width=unit(.6,"in"),
panel.spacing=unit(1, "in"))
fix.plot.phonol
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggsave("plots/fixation_plot_phonol.png", width = 40, height = 15, dpi = 300)
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).
fix.plot.fourgroups = ggplot() +
facet_wrap(~Determiner + SpeakerType + Phonol_Overlap + GenderMatch, ncol=4,
labeller = labeller(
Determiner = c(
"Definite" = "Definite determiner",
"Possessive" = "Possessive determiner"
),
SpeakerType = c(
"1" = "ASI",
"2" = "HSS"
),
Phonol_Overlap = c(
"Minimal Pair" = "Minimal Pair",
"NonMinPair" = "Non Minimal Pair"
),
GenderMatch = c(
"D=T" = "Distractor = Target",
"D=C" = "Distractor = Competitor"
)
)
) + theme_bw() +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
geom_line(data=fix.means.fourgroups,aes(x=TimeMS_adjusted, y=FixP, group=AOI, colour=AOI, lty=AOI), lwd=1.5) + # TODO use SE or CI here?
geom_ribbon(data=fix.means.fourgroups,aes(x=TimeMS_adjusted,ymin=FixP-se,ymax=FixP+se,color=AOI,fill=AOI), linewidth=.2, alpha=.3, lty="dashed", show.legend=F) +
labs(y="Fixation Proportion", x="Time relative to the target word onset (ms)") +
scale_x_continuous(limits=c(-300, 1400),expand=c(0,0),breaks=seq(-300, 1400, 200)) +
scale_color_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
scale_fill_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
scale_linetype_manual('AOI', values=c("solid","dotted", "dotdash")) +
theme(
text=element_text(size=20),
legend.key.height=unit(.3,"in"),
legend.key.width=unit(.6,"in"),
panel.spacing=unit(1, "in"))
fix.plot.fourgroups
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggsave("plots/fixation_plot_fourgroups.png", width = 40, height = 30, dpi = 300)
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).
fix.plot.gendermatch = ggplot() +
facet_wrap(~Determiner + SpeakerType + GenderMatch, ncol=4,
labeller = labeller(
Determiner = c(
"Definite" = "Definite determiner",
"Possessive" = "Possessive determiner"
),
SpeakerType = c(
"1" = "ASI",
"2" = "HSS"
),
GenderMatch = c(
"D=T" = "Distractor = Target",
"D=C" = "Distractor = Competitor"
)
)
) + theme_bw() +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
geom_line(data=fix.means.gendermatch,aes(x=TimeMS_adjusted, y=FixP, group=AOI, colour=AOI, lty=AOI), lwd=1.5) + # TODO use SE or CI here?
geom_ribbon(data=fix.means.gendermatch,aes(x=TimeMS_adjusted,ymin=FixP-se,ymax=FixP+se,color=AOI,fill=AOI), linewidth=.2, alpha=.3, lty="dashed", show.legend=F) +
labs(y="Fixation Proportion", x="Time relative to the target word onset (ms)") +
scale_x_continuous(limits=c(-300, 1400),expand=c(0,0),breaks=seq(-300, 1400, 200)) +
scale_color_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
scale_fill_manual('AOI', values=c("#af2013","#002e65","#ffac00")) +
scale_linetype_manual('AOI', values=c("solid","dotted", "dotdash")) +
theme(
text=element_text(size=20),
legend.key.height=unit(.3,"in"),
legend.key.width=unit(.6,"in"),
panel.spacing=unit(1, "in"))
fix.plot.gendermatch
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggsave("plots/fixation_plot_gendermatch.png", width = 40, height = 15, dpi = 300)
## Warning: Removed 1686 rows containing missing values or values outside the scale range
## (`geom_line()`).
fix.twovar <- fix.dat %>%
mutate(NonTarget = Competitor + Distractor) %>%
select(-Competitor, -Distractor)
fix.twovar <- fix.twovar %>%
mutate(AreaFixated = ifelse(AreaFixated == "Target", "Target", "NonTarget"))
fix.twovar$AreaFixated <- as.factor(fix.twovar$AreaFixated)
fix.twovar <- fix.twovar %>%
pivot_longer(
cols = c(Target, NonTarget),
names_to = "AOI",
values_to = "FixP"
)
fix.twovar = fix.twovar %>% mutate_at(vars(AOI), as.factor)
fix.twovar$Phonol_Overlap <- fix.twovar$Phonol_Overlap_T %>% fct_collapse(NonMinPair = c("First Syll-unstressed", "First Syll-stressed"))
summary(fix.twovar)
## ...1 TOI Participant Timeline
## Min. : 1 Length:1462700 Length:1462700 Length:1462700
## 1st Qu.:183183 Class :character Class :character Class :character
## Median :366326 Mode :character Mode :character Mode :character
## Mean :366328
## 3rd Qu.:549482
## Max. :732646
##
## Bin Bin_duration TimeMS Animacy_C
## Min. : 1.0 Min. :10 Min. : 10 Length:1462700
## 1st Qu.:180.0 1st Qu.:10 1st Qu.:1800 Class :character
## Median :342.0 Median :10 Median :3420 Mode :character
## Mean :337.6 Mean :10 Mean :3376
## 3rd Qu.:498.0 3rd Qu.:10 3rd Qu.:4980
## Max. :728.0 Max. :10 Max. :7280
##
## Animacy_D Animacy_T Determiner N_ending_C
## Length:1462700 Length:1462700 Length:1462700 Length:1462700
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## N_ending_D N_ending_T N_Gender_C N_Gender_D
## Length:1462700 Length:1462700 Length:1462700 Length:1462700
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## N_Gender_T Phonol_Overlap_C Phonol_Overlap_D Phonol_Overlap_T
## Length:1462700 Length:1462700 Mode:logical Length:1462700
## Class :character Class :character NA's:1462700 Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## ArticleOnset NounOnset EndingOnset EndingOffset Remaining
## Min. :2403 Min. :2496 Min. :2591 Min. :2838 Min. :3005
## 1st Qu.:2448 1st Qu.:2576 1st Qu.:2762 1st Qu.:3020 1st Qu.:3144
## Median :2479 Median :2613 Median :2859 Median :3079 Median :3230
## Mean :2490 Mean :2619 Mean :2877 Mean :3107 Mean :3239
## 3rd Qu.:2524 3rd Qu.:2669 3rd Qu.:2972 3rd Qu.:3196 3rd Qu.:3315
## Max. :2679 Max. :2807 Max. :3411 Max. :3546 Max. :3656
##
## SpeakerType Sex Age Position
## Min. :1.00 Length:1462700 Min. :19.00 Length:1462700
## 1st Qu.:1.00 Class :character 1st Qu.:20.00 Class :character
## Median :2.00 Mode :character Median :23.00 Mode :character
## Mean :1.54 Mean :23.75
## 3rd Qu.:2.00 3rd Qu.:25.00
## Max. :2.00 Max. :50.00
## NA's :284246
## Trackloss TimeMS_adjusted ArticleOnset_adjusted NounOnset_adjusted
## Mode :logical Min. :-2670.0 Min. :0 Min. : 70.0
## FALSE:1462700 1st Qu.: -690.0 1st Qu.:0 1st Qu.:113.0
## Median : 920.0 Median :0 Median :128.0
## Mean : 880.9 Mean :0 Mean :128.3
## 3rd Qu.: 2480.0 3rd Qu.:0 3rd Qu.:144.0
## Max. : 4750.0 Max. :0 Max. :202.0
##
## EndingOnset_adjusted EndingOffset_adjusted Remaining_adjusted
## Min. :170.0 Min. :365.0 Min. : 540.0
## 1st Qu.:276.0 1st Qu.:544.0 1st Qu.: 668.0
## Median :388.0 Median :593.0 Median : 738.0
## Mean :386.3 Mean :616.7 Mean : 748.9
## 3rd Qu.:459.0 3rd Qu.:694.0 3rd Qu.: 818.0
## Max. :773.0 Max. :908.0 Max. :1018.0
##
## GenderMatchTarget_D AreaFixated GenderMatch AOI
## Min. :0.0000 NonTarget:581038 Length:1462700 NonTarget:731350
## 1st Qu.:0.0000 Target :881662 Class :character Target :731350
## Median :1.0000 Mode :character
## Mean :0.5252
## 3rd Qu.:1.0000
## Max. :1.0000
##
## FixP Phonol_Overlap
## Min. :0.0 NonMinPair :976078
## 1st Qu.:0.0 Minimal Pair:486622
## Median :0.5
## Mean :0.5
## 3rd Qu.:1.0
## Max. :1.0
##
head(fix.twovar)
## # A tibble: 6 × 42
## ...1 TOI Participant Timeline Bin Bin_duration TimeMS Animacy_C Animacy_D
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 1 Tria… TA105 VersionA 23 10 230 Animate-… Animate-…
## 2 1 Tria… TA105 VersionA 23 10 230 Animate-… Animate-…
## 3 2 Tria… TA105 VersionA 24 10 240 Animate-… Animate-…
## 4 2 Tria… TA105 VersionA 24 10 240 Animate-… Animate-…
## 5 3 Tria… TA105 VersionA 25 10 250 Animate-… Animate-…
## 6 3 Tria… TA105 VersionA 25 10 250 Animate-… Animate-…
## # ℹ 33 more variables: Animacy_T <chr>, Determiner <chr>, N_ending_C <chr>,
## # N_ending_D <chr>, N_ending_T <chr>, N_Gender_C <chr>, N_Gender_D <chr>,
## # N_Gender_T <chr>, Phonol_Overlap_C <chr>, Phonol_Overlap_D <lgl>,
## # Phonol_Overlap_T <chr>, ArticleOnset <dbl>, NounOnset <dbl>,
## # EndingOnset <dbl>, EndingOffset <dbl>, Remaining <dbl>, SpeakerType <dbl>,
## # Sex <chr>, Age <dbl>, Position <chr>, Trackloss <lgl>,
## # TimeMS_adjusted <dbl>, ArticleOnset_adjusted <dbl>, …
fix.means.twovar = Rmisc::summarySE(fix.twovar, measurevar='FixP', groupvars=c('TimeMS_adjusted','SpeakerType', 'AOI', 'Determiner'), na.rm=T)
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
fix.means.twovar = fix.means.twovar %>% mutate_at(vars(SpeakerType, Determiner), as.factor)
summary(fix.means.twovar)
## TimeMS_adjusted SpeakerType AOI Determiner
## Min. :-2670.0 1:2674 NonTarget:2793 Definite :2792
## 1st Qu.: -900.0 2:2912 Target :2793 Possessive:2794
## Median : 850.0
## Mean : 853.9
## 3rd Qu.: 2590.0
## Max. : 4750.0
##
## N FixP sd se
## Min. : 1.0 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:256.0 1st Qu.:0.2891 1st Qu.:0.3629 1st Qu.:0.02152
## Median :285.0 Median :0.5000 Median :0.4597 Median :0.02706
## Mean :261.9 Mean :0.5000 Mean :0.4136 Mean :0.02904
## 3rd Qu.:303.0 3rd Qu.:0.7109 3rd Qu.:0.4802 3rd Qu.:0.02918
## Max. :364.0 Max. :1.0000 Max. :0.5774 Max. :0.33333
## NA's :88 NA's :88
## ci
## Min. :0.00000
## 1st Qu.:0.04236
## Median :0.05324
## Mean :0.06087
## 3rd Qu.:0.05745
## Max. :1.43422
## NA's :88
head(fix.means.twovar)
## TimeMS_adjusted SpeakerType AOI Determiner N FixP sd se ci
## 1 -2670 1 NonTarget Definite 5 0 0 0 0
## 2 -2670 1 Target Definite 5 1 0 0 0
## 3 -2670 2 NonTarget Definite 4 0 0 0 0
## 4 -2670 2 Target Definite 4 1 0 0 0
## 5 -2660 1 NonTarget Definite 5 0 0 0 0
## 6 -2660 1 Target Definite 5 1 0 0 0
fix.plot.twovar = ggplot() +
facet_wrap(~Determiner + SpeakerType, ncol=2,
labeller = labeller(
Determiner = c(
"Definite" = "Definite determiner",
"Possessive" = "Possessive determiner"
),
SpeakerType = c(
"1" = "ASI",
"2" = "HSS"
)
)
) + theme_bw() +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.8) +
geom_line(data=fix.means.twovar,aes(x=TimeMS_adjusted, y=FixP, group=AOI, colour=AOI, lty=AOI), lwd=1.5) +
geom_ribbon(data=fix.means.twovar,aes(x=TimeMS_adjusted,ymin=FixP-se,ymax=FixP+se,color=AOI,fill=AOI), linewidth=.2, alpha=.3, lty="dashed", show.legend=F) +
labs(y="Fixation Proportion", x="Time relative to the target word onset (ms)") +
scale_x_continuous(limits=c(-300, 1400),expand=c(0,0),breaks=seq(-300, 1400, 200)) +
scale_color_manual('AOI', values=c("#969798","#ffac00")) +
scale_fill_manual('AOI', values=c("#969798","#ffac00")) +
scale_linetype_manual('AOI', values=c("solid","dotted")) +
theme(
text=element_text(size=20),
legend.key.height=unit(.3,"in"),
legend.key.width=unit(.6,"in"),
panel.spacing=unit(1, "in"))
fix.plot.twovar
## Warning: Removed 1124 rows containing missing values or values outside the scale range
## (`geom_line()`).
ggsave("plots/fixation_plot_twovar.png", width = 20, height = 15, dpi = 300)
## Warning: Removed 1124 rows containing missing values or values outside the scale range
## (`geom_line()`).
Here we will run a by-participant analysis testing the divergence between the Target and NonTarget AOIs.
div.dat = fix.twovar %>%
filter(1400>=TimeMS_adjusted & TimeMS_adjusted>=-300) %>%
dplyr::mutate(StrataVars=paste(Participant, AOI, TimeMS_adjusted, sep='')) %>%
mutate_at(vars(SpeakerType, StrataVars),as.factor) %>%
droplevels()
summary(div.dat)
## ...1 TOI Participant Timeline
## Min. : 85 Length:393148 Length:393148 Length:393148
## 1st Qu.:187259 Class :character Class :character Class :character
## Median :372068 Mode :character Mode :character Mode :character
## Mean :369144
## 3rd Qu.:553190
## Max. :732480
##
## Bin Bin_duration TimeMS Animacy_C
## Min. :211.0 Min. :10 Min. :2110 Length:393148
## 1st Qu.:265.0 1st Qu.:10 1st Qu.:2650 Class :character
## Median :307.0 Median :10 Median :3070 Mode :character
## Mean :306.1 Mean :10 Mean :3061
## 3rd Qu.:347.0 3rd Qu.:10 3rd Qu.:3470
## Max. :408.0 Max. :10 Max. :4080
##
## Animacy_D Animacy_T Determiner N_ending_C
## Length:393148 Length:393148 Length:393148 Length:393148
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## N_ending_D N_ending_T N_Gender_C N_Gender_D
## Length:393148 Length:393148 Length:393148 Length:393148
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## N_Gender_T Phonol_Overlap_C Phonol_Overlap_D Phonol_Overlap_T
## Length:393148 Length:393148 Mode:logical Length:393148
## Class :character Class :character NA's:393148 Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## ArticleOnset NounOnset EndingOnset EndingOffset Remaining
## Min. :2403 Min. :2496 Min. :2591 Min. :2838 Min. :3005
## 1st Qu.:2450 1st Qu.:2576 1st Qu.:2762 1st Qu.:3020 1st Qu.:3144
## Median :2479 Median :2614 Median :2871 Median :3079 Median :3243
## Mean :2491 Mean :2620 Mean :2879 Mean :3108 Mean :3240
## 3rd Qu.:2524 3rd Qu.:2669 3rd Qu.:2974 3rd Qu.:3196 3rd Qu.:3315
## Max. :2679 Max. :2807 Max. :3411 Max. :3546 Max. :3656
##
## SpeakerType Sex Age Position
## 1:183908 Length:393148 Min. :19.00 Length:393148
## 2:209240 Class :character 1st Qu.:20.00 Class :character
## Mode :character Median :23.00 Mode :character
## Mean :23.79
## 3rd Qu.:25.00
## Max. :50.00
## NA's :77128
## Trackloss TimeMS_adjusted ArticleOnset_adjusted NounOnset_adjusted
## Mode :logical Min. :-300.0 Min. :0 Min. : 70.0
## FALSE:393148 1st Qu.: 160.0 1st Qu.:0 1st Qu.:113.0
## Median : 580.0 Median :0 Median :128.0
## Mean : 564.7 Mean :0 Mean :128.4
## 3rd Qu.: 980.0 3rd Qu.:0 3rd Qu.:144.0
## Max. :1400.0 Max. :0 Max. :202.0
##
## EndingOnset_adjusted EndingOffset_adjusted Remaining_adjusted
## Min. :170.0 Min. :365 Min. : 540.0
## 1st Qu.:276.0 1st Qu.:544 1st Qu.: 668.0
## Median :388.0 Median :594 Median : 739.0
## Mean :387.4 Mean :617 Mean : 749.4
## 3rd Qu.:459.0 3rd Qu.:694 3rd Qu.: 818.0
## Max. :773.0 Max. :908 Max. :1018.0
##
## GenderMatchTarget_D AreaFixated GenderMatch AOI
## Min. :0.0000 NonTarget:192712 Length:393148 NonTarget:196574
## 1st Qu.:0.0000 Target :200436 Class :character Target :196574
## Median :1.0000 Mode :character
## Mean :0.5224
## 3rd Qu.:1.0000
## Max. :1.0000
##
## FixP Phonol_Overlap StrataVars
## Min. :0.0 NonMinPair :260806 TA141NonTarget890 : 48
## 1st Qu.:0.0 Minimal Pair:132342 TA141NonTarget960 : 48
## Median :0.5 TA141Target890 : 48
## Mean :0.5 TA141Target960 : 48
## 3rd Qu.:1.0 TA105NonTarget1080: 47
## Max. :1.0 TA105NonTarget1110: 47
## (Other) :392862
head(div.dat)
## # A tibble: 6 × 43
## ...1 TOI Participant Timeline Bin Bin_duration TimeMS Animacy_C Animacy_D
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 85 Tria… TA105 VersionA 324 10 3240 Animate-… Animate-…
## 2 85 Tria… TA105 VersionA 324 10 3240 Animate-… Animate-…
## 3 86 Tria… TA105 VersionA 325 10 3250 Animate-… Animate-…
## 4 86 Tria… TA105 VersionA 325 10 3250 Animate-… Animate-…
## 5 87 Tria… TA105 VersionA 326 10 3260 Animate-… Animate-…
## 6 87 Tria… TA105 VersionA 326 10 3260 Animate-… Animate-…
## # ℹ 34 more variables: Animacy_T <chr>, Determiner <chr>, N_ending_C <chr>,
## # N_ending_D <chr>, N_ending_T <chr>, N_Gender_C <chr>, N_Gender_D <chr>,
## # N_Gender_T <chr>, Phonol_Overlap_C <chr>, Phonol_Overlap_D <lgl>,
## # Phonol_Overlap_T <chr>, ArticleOnset <dbl>, NounOnset <dbl>,
## # EndingOnset <dbl>, EndingOffset <dbl>, Remaining <dbl>, SpeakerType <fct>,
## # Sex <chr>, Age <dbl>, Position <chr>, Trackloss <lgl>,
## # TimeMS_adjusted <dbl>, ArticleOnset_adjusted <dbl>, …
div.dat.def <- div.dat %>% filter(Determiner == "Definite")
div.dat.pos <- div.dat %>% filter(Determiner == "Possessive")
This ended up yielding mostly NAs due to the small sample size.
Define a function that runs the bootstrap function for all the trials
run_boot_per_trial <- function(toi_id, df, Niter = 100, strata_col = "StrataVars") {
trial_data <- df %>% filter(TOI == toi_id) %>% droplevels()
boot_result <- boot::boot(
data = trial_data,
statistic = boot_L1L2_glm, # your divergence function
strata = trial_data[[strata_col]],
R = Niter
)
# Extract metadata from the first row (assuming Determiner etc. are constant per trial)
trial_info <- trial_data %>%
slice(1) %>%
select(TOI, Determiner, Phonol_Overlap, GenderMatch) # add more if needed
list(boot = boot_result, info = trial_info)
}
Run the function that runs the bootstrap function
trial_ids <- unique(div.dat$TOI)
# Store both the bootstrap and metadata
boot_trials <- lapply(trial_ids, function(toi) run_boot_per_trial(toi, div.dat, Niter = 100))
names(boot_trials) <- trial_ids
Extract divergence values + metadata into a summary dataframe
# Function to extract means and metadata
extract_div_summary <- function(result) {
bootres <- result$boot
info <- result$info
tibble::tibble(
TOI = info$TOI,
Determiner = info$Determiner,
Phonol_Overlap = info$Phonol_Overlap,
GenderMatch = info$GenderMatchTarget_D,
ASI_divergence = mean(bootres$t[,2], na.rm = TRUE) * 10 - 10,
HSS_divergence = mean(bootres$t[,3], na.rm = TRUE) * 10 - 10
)
}
# Build summary
div_summary_df <- purrr::map_dfr(boot_trials, extract_div_summary)
Bootstrap function using glm (generalized linear model)
boot_L1L2_glm = function(original_data, resample_indices){
dat = original_data[resample_indices, ]
# a statistical test at each timepoint for each group
test_g1 = dat %>% # t-test for L1 group
subset(SpeakerType == 1) %>% group_by(TimeMS_adjusted) %>%
dplyr::summarise(t = summary(glm(FixP ~ AOI, family = binomial))$coefficients["AOITarget", "z value"])
test_g2 = dat %>% # t-test for L2 group
subset(SpeakerType == 2) %>% group_by(TimeMS_adjusted) %>%
dplyr::summarise(t = summary(glm(FixP ~ AOI, family = binomial))$coefficients["AOITarget", "z value"])
# return a TRUE/FALSE vector of significant positive t-scores
# (positive means more looks to the target than unrelated)
t_g1 = test_g1$t > 1.96
t_g2 = test_g2$t > 1.96
# create empty vectors to store onsets
onset_g1 = onset_g2 = c()
# find the index of the earliest run of sequential TRUEs; threshold is Nbins
for (i in 1:(length(t_g2)-Nbins)) {
onset_g1[i] = sum(t_g1[i:(i+Nbins-1)]) == Nbins
onset_g2[i] = sum(t_g2[i:(i+Nbins-1)]) == Nbins
}
# find the difference between onsets
delta_g1g2 = which(onset_g2)[1] - which(onset_g1)[1]
# print
# note: the bootstrap returns the indices of the respective timepoints, not absolute times.
# The annotations to the right of each index (e.g. t[,1]) indicate where in the boot object the bootstrapped onset distributions can be found.
c(delta_g1g2, # onset difference L1 vs. L2 t[,1]
which(onset_g1)[1], # onset bin for looks to target L1 t[,2]
which(onset_g2)[1]) # onset bin for looks to target L2 t[,3]
}
Run the glm bootstrap on the two subsets by determiner type
bootres_L1L2_glm_def = boot::boot(
data = div.dat.def, # data set to bootstrap
statistic = boot_L1L2_glm, # bootstrap function
strata = div.dat.def$StrataVars, # stratification variable
R = Niter) # number of iterations
bootres_L1L2_glm_pos = boot::boot(
data = div.dat.pos, # data set to bootstrap
statistic = boot_L1L2_glm, # bootstrap function
strata = div.dat.pos$StrataVars, # stratification variable
R = Niter) # number of iterations
bootres_L1L2_glm_def
bootres_L1L2_glm_pos
Save the glm results
save(bootres_L1L2_glm_def, file="DPA_results.glm.def.rds")
save(bootres_L1L2_glm_pos, file="DPA_results.glm.pos.rds")
Load the lmer results
load("DPA_results.lmer.def.rds")
load("DPA_results.lmer.pos.rds")
Load the glm results
load("DPA_results.glm.def.rds")
load("DPA_results.glm.pos.rds")
Define function to extract divergence points and confidence intervals from bootstrapping results
get_divergence_ms <- function(bootres, index) {
# index = 2 for ASI, 3 for HSS
mean_bin <- mean(bootres$t[, index], na.rm = TRUE)
ci_bin <- boot::boot.ci(bootres, index = index, type = "perc")$percent[4:5]
mean_ms <- (mean_bin - 1) * 10
ci_ms <- (ci_bin - 1) * 10
list(mean = mean_ms, ci = ci_ms)
}
Run the function on all the results
# Definite determiner
def_asi <- get_divergence_ms(bootres_L1L2_lmer_def, index = 2)
def_hss <- get_divergence_ms(bootres_L1L2_lmer_def, index = 3)
# Possessive determiner
pos_asi <- get_divergence_ms(bootres_L1L2_lmer_pos, index = 2)
pos_hss <- get_divergence_ms(bootres_L1L2_lmer_pos, index = 3)
Create dataframe for plotting
div_df <- tibble::tibble(
Determiner = c("Definite", "Definite", "Possessive", "Possessive"),
SpeakerType = c("1", "2", "1", "2"),
TimeMS = c(def_asi$mean, def_hss$mean, pos_asi$mean, pos_hss$mean),
CI_low = c(def_asi$ci[1], def_hss$ci[1], pos_asi$ci[1], pos_hss$ci[1]),
CI_high = c(def_asi$ci[2], def_hss$ci[2], pos_asi$ci[2], pos_hss$ci[2])
)
div_df = div_df %>% mutate_at(vars(SpeakerType, Determiner), as.factor)
div_df <- div_df %>%
mutate(label = paste0(TimeMS, " ms"))
Add divergence points and their confidence intervals to the two-variable plots
fix.plot.twovar +
geom_point(data = div_df,
aes(x = TimeMS, y = 0.48),
shape = 18, size = 4, color = "black") +
geom_errorbarh(data = div_df,
aes(xmin = CI_low, xmax = CI_high, y = 0.48),
height = 0.02, color = "black", linewidth = 0.6) +
geom_text(data = div_df,
aes(x = TimeMS - 50, y = 0.55, label = label),
size = 10, fontface="bold")
## Warning: Removed 1124 rows containing missing values or values outside the scale range
## (`geom_line()`).
Save the plots
ggsave("plots/fixation_plot_twovar_divpts.png", width = 20, height = 15, dpi = 300)
## Warning: Removed 1124 rows containing missing values or values outside the scale range
## (`geom_line()`).
We can compute a p-value for the group comparison by creating a bootstrap distribution of the null hypothesis. To do this, we will first pool the original data from both groups, randomly assign group labels, and then estimate a difference in divergence point. We will repeat this many times to obtain a distribution of divergence point that could be expected under the null hypothesis. We will obtain the p-value by calculating the proportion of samples from this null distribution that are larger than the observed difference in divergence point in the empirical data.
Below, we will calculate a p-value for the group comparison (ASI vs. HSS group). We will first define a bootstrap function.
boot_L1L2_pval = function(original_data, resample_indices){
dat_resample = original_data[resample_indices, ] # resample the data
dat = dat_resample %>%
group_by(Participant, AOI, TimeMS_adjusted) %>%
transform(SpeakerType=sample(SpeakerType,replace=F)) %>% # randomly assign group labels
ungroup() #%>%
# a statistical test at each timepoint for each group
test_g1 = dat %>% # t-test for L1 group
subset(SpeakerType == 1) %>% group_by(TimeMS_adjusted) %>%
dplyr::summarise(t = summary(lmer(FixP ~ AOI + (1|Participant)))$coefficients["AOITarget", "t value"])
test_g2 = dat %>% # t-test for L2 group
subset(SpeakerType == 2) %>% group_by(TimeMS_adjusted) %>%
dplyr::summarise(t = summary(lmer(FixP ~ AOI + (1|Participant)))$coefficients["AOITarget", "t value"])
# return a TRUE/FALSE vector of significant positive t-scores
# (positive means more looks to the target than unrelated)
t_g1 = test_g1$t > 1.96
t_g2 = test_g2$t > 1.96
# create empty vectors to store onsets
onset_g1 = onset_g2 = c()
# find the index of the earliest run of 4 sequential TRUEs
for (i in 1:(length(t_g2)-Nbins)) {
onset_g1[i] = sum(t_g1[i:(i+Nbins-1)]) == Nbins
onset_g2[i] = sum(t_g2[i:(i+Nbins-1)]) == Nbins
}
# find the difference between onsets
delta_g1g2 = which(onset_g2)[1] - which(onset_g1)[1]
# print
delta_g1g2 # onset difference L1 vs. L2 t[,1]
}
bootres_L1L2_pval_def = boot::boot(
data = div.dat.def, # data set to bootstrap
statistic = boot_L1L2_pval, # bootstrap function
strata = div.dat.def$StrataVars, # stratification variable
R = Niter) # number of iterations
bootres_L1L2_pval_pos = boot::boot(
data = div.dat.pos, # data set to bootstrap
statistic = boot_L1L2_pval, # bootstrap function
strata = div.dat.pos$StrataVars, # stratification variable
R = Niter) # number of iterations
bootres_L1L2_pval_def
bootres_L1L2_pval_pos
save(bootres_L1L2_pval, file="DPA_results.pval.def.rds")
save(bootres_L1L2_pval, file="DPA_results.pval.pos.rds")
load("DPA_results.pval.def.rds")
load("DPA_results.pval.pos.rds")
pval.def <- round(mean(bootres_L1L2_pval_def$t[,1]>=bootres_L1L2_lmer_def$t0[1], na.rm=T), 5)
pval.pos <- round(mean(bootres_L1L2_pval_pos$t[,1]>=bootres_L1L2_lmer_pos$t0[1], na.rm=T), 5)
pval.def
pval.pos